{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>School</th>\n",
       "      <th>Class</th>\n",
       "      <th>ID</th>\n",
       "      <th>Gender</th>\n",
       "      <th>Address</th>\n",
       "      <th>Height</th>\n",
       "      <th>Weight</th>\n",
       "      <th>Math</th>\n",
       "      <th>Physics</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>S_1</td>\n",
       "      <td>C_1</td>\n",
       "      <td>1101</td>\n",
       "      <td>M</td>\n",
       "      <td>street_1</td>\n",
       "      <td>173</td>\n",
       "      <td>63</td>\n",
       "      <td>34.0</td>\n",
       "      <td>A+</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>S_1</td>\n",
       "      <td>C_1</td>\n",
       "      <td>1102</td>\n",
       "      <td>F</td>\n",
       "      <td>street_2</td>\n",
       "      <td>192</td>\n",
       "      <td>73</td>\n",
       "      <td>32.5</td>\n",
       "      <td>B+</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>S_1</td>\n",
       "      <td>C_1</td>\n",
       "      <td>1103</td>\n",
       "      <td>M</td>\n",
       "      <td>street_2</td>\n",
       "      <td>186</td>\n",
       "      <td>82</td>\n",
       "      <td>87.2</td>\n",
       "      <td>B+</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>S_1</td>\n",
       "      <td>C_1</td>\n",
       "      <td>1104</td>\n",
       "      <td>F</td>\n",
       "      <td>street_2</td>\n",
       "      <td>167</td>\n",
       "      <td>81</td>\n",
       "      <td>80.4</td>\n",
       "      <td>B-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>S_1</td>\n",
       "      <td>C_1</td>\n",
       "      <td>1105</td>\n",
       "      <td>F</td>\n",
       "      <td>street_4</td>\n",
       "      <td>159</td>\n",
       "      <td>64</td>\n",
       "      <td>84.8</td>\n",
       "      <td>B+</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  School Class    ID Gender   Address  Height  Weight  Math Physics\n",
       "0    S_1   C_1  1101      M  street_1     173      63  34.0      A+\n",
       "1    S_1   C_1  1102      F  street_2     192      73  32.5      B+\n",
       "2    S_1   C_1  1103      M  street_2     186      82  87.2      B+\n",
       "3    S_1   C_1  1104      F  street_2     167      81  80.4      B-\n",
       "4    S_1   C_1  1105      F  street_4     159      64  84.8      B+"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "df = pd.read_csv('data/table.csv')\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "append方法 主要是用于对行进行添加\n",
    "1 行添加 必须要指定行name\n",
    "2 表添加"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Gender</th>\n",
       "      <th>Height</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>M</td>\n",
       "      <td>173</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>F</td>\n",
       "      <td>192</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>M</td>\n",
       "      <td>186</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>F</td>\n",
       "      <td>167</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>new_row</th>\n",
       "      <td>F</td>\n",
       "      <td>188</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Gender  Height\n",
       "0            M     173\n",
       "1            F     192\n",
       "2            M     186\n",
       "3            F     167\n",
       "new_row      F     188"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_append = df.loc[:3,['Gender','Height']].copy()\n",
    "s = pd.Series({'Gender':'F','Height':188},name='new_row')\n",
    "df_append.append(s)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Gender</th>\n",
       "      <th>Height</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>M</td>\n",
       "      <td>173</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>F</td>\n",
       "      <td>192</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>M</td>\n",
       "      <td>186</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>F</td>\n",
       "      <td>167</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>new_1</th>\n",
       "      <td>F</td>\n",
       "      <td>188</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>new_2</th>\n",
       "      <td>M</td>\n",
       "      <td>176</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      Gender  Height\n",
       "0          M     173\n",
       "1          F     192\n",
       "2          M     186\n",
       "3          F     167\n",
       "new_1      F     188\n",
       "new_2      M     176"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_temp = pd.DataFrame({'Gender':['F','M'],'Height':[188,176]},index=['new_1','new_2'])\n",
    "df_append.append(df_temp)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "assign主要是用于添加列 列名直接由参数指定"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Gender</th>\n",
       "      <th>Height</th>\n",
       "      <th>Letter</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>M</td>\n",
       "      <td>173</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>F</td>\n",
       "      <td>192</td>\n",
       "      <td>b</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>M</td>\n",
       "      <td>186</td>\n",
       "      <td>c</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>F</td>\n",
       "      <td>167</td>\n",
       "      <td>d</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Gender  Height Letter\n",
       "0      M     173      a\n",
       "1      F     192      b\n",
       "2      M     186      c\n",
       "3      F     167      d"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "s = pd.Series(list('abcd'),index=range(4))\n",
    "df_append.assign(Letter=s)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Gender</th>\n",
       "      <th>Height</th>\n",
       "      <th>col1</th>\n",
       "      <th>col2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>M</td>\n",
       "      <td>173</td>\n",
       "      <td>MM</td>\n",
       "      <td>a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>F</td>\n",
       "      <td>192</td>\n",
       "      <td>FF</td>\n",
       "      <td>b</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>M</td>\n",
       "      <td>186</td>\n",
       "      <td>MM</td>\n",
       "      <td>c</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>F</td>\n",
       "      <td>167</td>\n",
       "      <td>FF</td>\n",
       "      <td>d</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Gender  Height col1 col2\n",
       "0      M     173   MM    a\n",
       "1      F     192   FF    b\n",
       "2      M     186   MM    c\n",
       "3      F     167   FF    d"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#一次添加多列\n",
    "df_append.assign(col1=lambda x:x['Gender']*2,\n",
    "                 col2=s)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0       M\n",
      "1       F\n",
      "10    NaN\n",
      "11    NaN\n",
      "Name: Gender, dtype: object 0     NaN\n",
      "1     NaN\n",
      "10      M\n",
      "11      F\n",
      "Name: Gender, dtype: object\n",
      "0     173.0\n",
      "1     192.0\n",
      "10      NaN\n",
      "11      NaN\n",
      "Name: Height, dtype: float64 0       NaN\n",
      "1       NaN\n",
      "10    161.0\n",
      "11    175.0\n",
      "Name: Height, dtype: float64\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Gender</th>\n",
       "      <th>Height</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Gender Height\n",
       "0     NaN    NaN\n",
       "1     NaN    NaN\n",
       "10    NaN    NaN\n",
       "11    NaN    NaN"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#combine函数 \n",
    "df_combine_1 = df.loc[:1,['Gender','Height']].copy() \n",
    "df_combine_2 = df.loc[10:11,['Gender','Height']].copy()\n",
    "#df_combine_1.combine(df_combine_2)\n",
    "df_combine_1.combine(df_combine_2,lambda x,y:print(x,y))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>8</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   A  B\n",
       "0  8  6\n",
       "1  7  5"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#根据列均值大小进行填充 x y 分别代表df1 df2 的第一列 第二列\n",
    "df1 = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})\n",
    "df2 = pd.DataFrame({'A': [8, 7], 'B': [6, 5]})\n",
    "df1.combine(df2,lambda x,y:x if x.mean()>y.mean() else y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>8</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>NaN</td>\n",
       "      <td>7</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A  B    C\n",
       "0 NaN  8  6.0\n",
       "1 NaN  7  5.0"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#索引对齐特性（既指行索引也指列索引）（默认状态下，后面的表没有的行列都会设置为NaN）¶\n",
    "df2 = pd.DataFrame({'B': [8, 7], 'C': [6, 5]})\n",
    "df1.combine(df2,lambda x,y:x if x.mean()>y.mean() else y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     A    B\n",
       "0  1.0  3.0\n",
       "1  0.0  4.0"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#combine_first方法 df2填补df1的缺失值\n",
    "df1 = pd.DataFrame({'A': [None, 0], 'B': [None, 4]})\n",
    "df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]})\n",
    "df1.combine_first(df2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     A    B    C\n",
       "0  NaN  4.0  NaN\n",
       "1  0.0  3.0  1.0\n",
       "2  NaN  3.0  1.0"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#同时可以指定索引 \n",
    "df1 = pd.DataFrame({'A': [None, 0], 'B': [4, None]})\n",
    "df2 = pd.DataFrame({'B': [3, 3], 'C': [1, 1]}, index=[1, 2])\n",
    "df1.combine_first(df2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   A  B\n",
       "0  1  4\n",
       "1  2  5\n",
       "2  3  6"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#update方法 默认使用左连接\n",
    "df1 = pd.DataFrame({'A': [1, 2, 3],\n",
    "                    'B': [400, 500, 600]})\n",
    "df2 = pd.DataFrame({'B': [4, 5, 6],\n",
    "                    'C': [7, 8, 9]})\n",
    "df1.update(df2)\n",
    "df1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>500.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   A      B\n",
       "0  1    4.0\n",
       "1  2  500.0\n",
       "2  3    6.0"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#update属性 缺失值不会填充 还是按照第一个的值\n",
    "df1 = pd.DataFrame({'A': [1, 2, 3],\n",
    "                    'B': [400, 500, 600]})\n",
    "df2 = pd.DataFrame({'B': [4, np.nan, 6]})\n",
    "df1.update(df2)\n",
    "df1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "#concat默认使用纵向连接 axis = 0\n",
    "df1 = pd.DataFrame({'A': ['A0', 'A1'],\n",
    "                    'B': ['B0', 'B1']},\n",
    "                    index = [0,1])\n",
    "df2 = pd.DataFrame({'A': ['A2', 'A3'],\n",
    "                    'B': ['B2', 'B3']},\n",
    "                    index = [2,3])\n",
    "df3 = pd.DataFrame({'A': ['A1', 'A3'],\n",
    "                    'D': ['D1', 'D3'],\n",
    "                    'E': ['E1', 'E3']},\n",
    "                    index = [1,3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A3</td>\n",
       "      <td>B3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A   B\n",
       "0  A0  B0\n",
       "1  A1  B1\n",
       "2  A2  B2\n",
       "3  A3  B3"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat([df1,df2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>A3</td>\n",
       "      <td>B3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     A    B    A    B\n",
       "0   A0   B0  NaN  NaN\n",
       "1   A1   B1  NaN  NaN\n",
       "2  NaN  NaN   A2   B2\n",
       "3  NaN  NaN   A3   B3"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat([df1,df2],axis = 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A\n",
       "1  A1\n",
       "3  A3\n",
       "0  A0\n",
       "1  A1"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#join设置为内连接（由于axis=0，因此列取交集） 只是看列名的交集 并不是列里面值的交集、\n",
    "pd.concat([df3,df1],join='inner')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>D</th>\n",
       "      <th>E</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>D1</td>\n",
       "      <td>E1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>D3</td>\n",
       "      <td>E3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A    B    D    E\n",
       "1  A1  NaN   D1   E1\n",
       "3  A3  NaN   D3   E3\n",
       "0  A0   B0  NaN  NaN\n",
       "1  A1   B1  NaN  NaN"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#join设置为外链接 里面的sort函数设置列排序\n",
    "pd.concat([df3,df1],join='outer',sort=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A   B\n",
       "0  A0  B0\n",
       "1  A1  B1"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A3</td>\n",
       "      <td>B3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A   B\n",
       "0  A0  B0\n",
       "1  A1  B1\n",
       "2  A2  B2\n",
       "3  A3  B3"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat([df1,df2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">x</th>\n",
       "      <th>0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">y</th>\n",
       "      <th>2</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A3</td>\n",
       "      <td>B3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      A   B\n",
       "x 0  A0  B0\n",
       "  1  A1  B1\n",
       "y 2  A2  B2\n",
       "  3  A3  B3"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#key参数用于对不同的数据框增加一个标号，便于索引\n",
    "#keys相当于设置表的title\n",
    "pd.concat([df1,df2], keys=['x', 'y'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "#merge/join与concat的不同之处在于on参数，可以指定某一个对象为key来进行连接\n",
    "left = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'],\n",
    "                     'key2': ['K0', 'K1', 'K0', 'K1'],\n",
    "                      'A': ['A0', 'A1', 'A2', 'A3'],\n",
    "                      'B': ['B0', 'B1', 'B2', 'B3']}) \n",
    "right = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'],\n",
    "                      'key2': ['K0', 'K0', 'K0', 'K0'],\n",
    "                      'C': ['C0', 'C1', 'C2', 'C3'],\n",
    "                      'D': ['D0', 'D1', 'D2', 'D3']})\n",
    "right2 = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'],\n",
    "                      'key2': ['K0', 'K0', 'K0', 'K0'],\n",
    "                      'C': ['C0', 'C1', 'C2', 'C3']})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key1</th>\n",
       "      <th>key2_x</th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>key2_y</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>K0</td>\n",
       "      <td>K0</td>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>K0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>K0</td>\n",
       "      <td>K1</td>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>K0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>K1</td>\n",
       "      <td>K0</td>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>K0</td>\n",
       "      <td>C1</td>\n",
       "      <td>D1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>K1</td>\n",
       "      <td>K0</td>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>K0</td>\n",
       "      <td>C2</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>K2</td>\n",
       "      <td>K1</td>\n",
       "      <td>A3</td>\n",
       "      <td>B3</td>\n",
       "      <td>K0</td>\n",
       "      <td>C3</td>\n",
       "      <td>D3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  key1 key2_x   A   B key2_y   C   D\n",
       "0   K0     K0  A0  B0     K0  C0  D0\n",
       "1   K0     K1  A1  B1     K0  C0  D0\n",
       "2   K1     K0  A2  B2     K0  C1  D1\n",
       "3   K1     K0  A2  B2     K0  C2  D2\n",
       "4   K2     K1  A3  B3     K0  C3  D3"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#返回的是左右两边都存在时发的结果 merge默认使用的是内连接 完全重复的不会返回？ \n",
    "pd.merge(left, right, on='key1')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>K0</td>\n",
       "      <td>K0</td>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>K0</td>\n",
       "      <td>K1</td>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>K1</td>\n",
       "      <td>K0</td>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>K2</td>\n",
       "      <td>K1</td>\n",
       "      <td>A3</td>\n",
       "      <td>B3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  key1 key2   A   B\n",
       "0   K0   K0  A0  B0\n",
       "1   K0   K1  A1  B1\n",
       "2   K1   K0  A2  B2\n",
       "3   K2   K1  A3  B3"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "left"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>K0</td>\n",
       "      <td>K0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>K1</td>\n",
       "      <td>K0</td>\n",
       "      <td>C1</td>\n",
       "      <td>D1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>K1</td>\n",
       "      <td>K0</td>\n",
       "      <td>C2</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>K2</td>\n",
       "      <td>K0</td>\n",
       "      <td>C3</td>\n",
       "      <td>D3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  key1 key2   C   D\n",
       "0   K0   K0  C0  D0\n",
       "1   K1   K0  C1  D1\n",
       "2   K1   K0  C2  D2\n",
       "3   K2   K0  C3  D3"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "right"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>K0</td>\n",
       "      <td>K0</td>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>K1</td>\n",
       "      <td>K0</td>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>C1</td>\n",
       "      <td>D1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>K1</td>\n",
       "      <td>K0</td>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>C2</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  key1 key2   A   B   C   D\n",
       "0   K0   K0  A0  B0  C0  D0\n",
       "1   K1   K0  A2  B2  C1  D1\n",
       "2   K1   K0  A2  B2  C2  D2"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(left, right, on=['key1','key2'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "right = pd.DataFrame({'A': [4, 5, 6], 'B': [2, 3, 4]})\n",
    "left = pd.DataFrame({'A': [1, 2], 'B': [2, 1]})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "validate检验的是到底哪一边出现了重复索引，如果是“one_to_one”则两侧索引都是唯一，如果\"one_to_many\"则左侧唯一"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A_x</th>\n",
       "      <th>B</th>\n",
       "      <th>A_y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2.0</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>3</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   A_x  B  A_y\n",
       "0  1.0  2  4.0\n",
       "1  2.0  1  NaN\n",
       "2  NaN  3  5.0\n",
       "3  NaN  4  6.0"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "left = pd.DataFrame({'A': [1, 2], 'B': [2, 1]})\n",
    "pd.merge(left, right, on='B', how = 'outer',validate='one_to_one')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>col1</th>\n",
       "      <th>col_left</th>\n",
       "      <th>col_right</th>\n",
       "      <th>_merge</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>a</td>\n",
       "      <td>NaN</td>\n",
       "      <td>left_only</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>b</td>\n",
       "      <td>2.0</td>\n",
       "      <td>both</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>right_only</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>right_only</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   col1 col_left  col_right      _merge\n",
       "0     0        a        NaN   left_only\n",
       "1     1        b        2.0        both\n",
       "2     2      NaN        2.0  right_only\n",
       "3     2      NaN        2.0  right_only"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#indicator参数指示了，合并后该行索引的来源\n",
    "df1 = pd.DataFrame({'col1': [0, 1], 'col_left': ['a', 'b']})\n",
    "df2 = pd.DataFrame({'col1': [1, 2, 2], 'col_right': [2, 2, 2]})\n",
    "pd.merge(df1, df2, on='col1', how='outer', indicator=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>K0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>K1</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>K2</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>C2</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     A   B    C    D\n",
       "K0  A0  B0   C0   D0\n",
       "K1  A1  B1  NaN  NaN\n",
       "K2  A2  B2   C2   D2"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#join函数默认使用的是左连接\n",
    "left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],\n",
    "                     'B': ['B0', 'B1', 'B2']},\n",
    "                    index=['K0', 'K1', 'K2'])\n",
    "right = pd.DataFrame({'C': ['C0', 'C2', 'C3'],\n",
    "                      'D': ['D0', 'D2', 'D3']},\n",
    "                    index=['K0', 'K2', 'K3'])\n",
    "left.join(right)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>key</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>K0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>K1</td>\n",
       "      <td>C1</td>\n",
       "      <td>D1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>K0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A3</td>\n",
       "      <td>B3</td>\n",
       "      <td>K1</td>\n",
       "      <td>C1</td>\n",
       "      <td>D1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A   B key   C   D\n",
       "0  A0  B0  K0  C0  D0\n",
       "1  A1  B1  K1  C1  D1\n",
       "2  A2  B2  K0  C0  D0\n",
       "3  A3  B3  K1  C1  D1"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#对于多对一模式下的合并 使用join比较方便 因为其默认使用左连接？\n",
    "left = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],\n",
    "                     'B': ['B0', 'B1', 'B2', 'B3'],\n",
    "                     'key': ['K0', 'K1', 'K0', 'K1']})\n",
    "right = pd.DataFrame({'C': ['C0', 'C1'],\n",
    "                      'D': ['D0', 'D1']},\n",
    "                     index=['K0', 'K1'])\n",
    "left.join(right, on='key')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "left = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],\n",
    "                     'B': ['B0', 'B1', 'B2', 'B3'],\n",
    "                     'key1': ['K0', 'K0', 'K1', 'K2'],\n",
    "                     'key2': ['K0', 'K1', 'K0', 'K1']})\n",
    "index = pd.MultiIndex.from_tuples([('K0', 'K0'), ('K1', 'K0'),\n",
    "                                   ('K2', 'K0'), ('K2', 'K1')],names=['key1','key2'])\n",
    "right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'],\n",
    "                      'D': ['D0', 'D1', 'D2', 'D3']}, index=index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>K0</th>\n",
       "      <th>K0</th>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>K1</th>\n",
       "      <th>K0</th>\n",
       "      <td>C1</td>\n",
       "      <td>D1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">K2</th>\n",
       "      <th>K0</th>\n",
       "      <td>C2</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>K1</th>\n",
       "      <td>C3</td>\n",
       "      <td>D3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            C   D\n",
       "key1 key2        \n",
       "K0   K0    C0  D0\n",
       "K1   K0    C1  D1\n",
       "K2   K0    C2  D2\n",
       "     K1    C3  D3"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "right"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "练习题 练习一"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "#这两张表既有重复的元素 也有不重复的 总共80个员工\n",
    "df1 = pd.read_csv('data/Employee1.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2 = pd.read_csv('data/Employee2.csv')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "问题一：每个公司有多少员工满足如下条件：既出现第一张表，又出现在第二张表。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['c12',\n",
       " 'b7',\n",
       " 'e10',\n",
       " 'c13',\n",
       " 'a1',\n",
       " 'b1',\n",
       " 'd10',\n",
       " 'a3',\n",
       " 'b15',\n",
       " 'a6',\n",
       " 'c3',\n",
       " 'e11',\n",
       " 'e8',\n",
       " 'c10',\n",
       " 'd5',\n",
       " 'b3']"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#使用到了集合 还有intersection函数\n",
    "L = list(set(df1['Name']).intersection(set(df2['Name'])))\n",
    "L"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "问题二:将所有不符合(a)中条件的行筛选出来，合并为一张新表，列名与原表一致。¶"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_b1 = df1[~df1['Name'].isin(L)]\n",
    "df_b2 = df2[~df2['Name'].isin(L)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_b = pd.concat([df_b1,df_b2]).set_index('Name')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Company</th>\n",
       "      <th>Age</th>\n",
       "      <th>Height</th>\n",
       "      <th>Weight</th>\n",
       "      <th>Salary</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Name</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a4</th>\n",
       "      <td>A</td>\n",
       "      <td>43</td>\n",
       "      <td>158</td>\n",
       "      <td>62.5</td>\n",
       "      <td>21755</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>a7</th>\n",
       "      <td>A</td>\n",
       "      <td>49</td>\n",
       "      <td>171</td>\n",
       "      <td>94.6</td>\n",
       "      <td>6177</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>a8</th>\n",
       "      <td>A</td>\n",
       "      <td>51</td>\n",
       "      <td>168</td>\n",
       "      <td>89.5</td>\n",
       "      <td>3246</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>a9</th>\n",
       "      <td>A</td>\n",
       "      <td>36</td>\n",
       "      <td>186</td>\n",
       "      <td>62.8</td>\n",
       "      <td>3569</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>a13</th>\n",
       "      <td>A</td>\n",
       "      <td>58</td>\n",
       "      <td>190</td>\n",
       "      <td>75.9</td>\n",
       "      <td>21854</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Company  Age  Height  Weight  Salary\n",
       "Name                                     \n",
       "a4         A   43     158    62.5   21755\n",
       "a7         A   49     171    94.6    6177\n",
       "a8         A   51     168    89.5    3246\n",
       "a9         A   36     186    62.8    3569\n",
       "a13        A   58     190    75.9   21854"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_b.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(45, 5)"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_b.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "练习二："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [],
   "source": [
    "df1 = pd.read_csv('data/Course1.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2 = pd.read_csv('data/Course2.csv')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "问题一：将两张表分别拆分为专业课和非专业课\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = [\"学科基础课\",\"专业必修课\",\"专业选修课\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_a11 = df1[df1[\"课程类别\"].isin(a)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_a12 = df1[~df1[\"课程类别\"].isin(a)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_a21 = df2[df2[\"课程类别\"].isin(a)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_a22 = df2[~df2[\"课程类别\"].isin(a)]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "问题二：将两张专业课的分数表和两张非专业课的分数表分别合并"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [],
   "source": [
    "common = pd.concat([df_a11,df_a21])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>课程名字</th>\n",
       "      <th>课程类别</th>\n",
       "      <th>学分</th>\n",
       "      <th>分数</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>云计算应用与开发</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>96.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>社会计算</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>78.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>深度学习</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>75.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>人工智能导论</td>\n",
       "      <td>专业必修课</td>\n",
       "      <td>3</td>\n",
       "      <td>84.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>数据结构与算法</td>\n",
       "      <td>学科基础课</td>\n",
       "      <td>5</td>\n",
       "      <td>82.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>开源软件设计与开发</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>81.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>大数据应用案例</td>\n",
       "      <td>专业必修课</td>\n",
       "      <td>3</td>\n",
       "      <td>91.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>电子商务</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>74.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>高等数学（二）</td>\n",
       "      <td>学科基础课</td>\n",
       "      <td>4</td>\n",
       "      <td>92.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>计算机视觉</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>离散数学</td>\n",
       "      <td>学科基础课</td>\n",
       "      <td>4</td>\n",
       "      <td>82.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>数据科学的数学基础</td>\n",
       "      <td>专业必修课</td>\n",
       "      <td>4</td>\n",
       "      <td>75.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>程序设计</td>\n",
       "      <td>学科基础课</td>\n",
       "      <td>4</td>\n",
       "      <td>84.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>线性代数</td>\n",
       "      <td>学科基础课</td>\n",
       "      <td>3</td>\n",
       "      <td>97.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>机器学习</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>88.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>位置服务</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>87.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>数据挖掘</td>\n",
       "      <td>专业必修课</td>\n",
       "      <td>4</td>\n",
       "      <td>71.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>概率图模型</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>96.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>高等统计选讲</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>87.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>推荐系统</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>97.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>智慧城市</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>77.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>高等数学（一）</td>\n",
       "      <td>学科基础课</td>\n",
       "      <td>4</td>\n",
       "      <td>99.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>数据科学与工程导论</td>\n",
       "      <td>学科基础课</td>\n",
       "      <td>3</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>专业英语</td>\n",
       "      <td>学科基础课</td>\n",
       "      <td>2</td>\n",
       "      <td>100.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>概率论</td>\n",
       "      <td>学科基础课</td>\n",
       "      <td>3</td>\n",
       "      <td>99.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>计算机系统</td>\n",
       "      <td>专业必修课</td>\n",
       "      <td>4</td>\n",
       "      <td>80.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>数据科学与工程算法</td>\n",
       "      <td>专业必修课</td>\n",
       "      <td>3</td>\n",
       "      <td>99.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>操作系统</td>\n",
       "      <td>专业必修课</td>\n",
       "      <td>4</td>\n",
       "      <td>89.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>分布式模型与编程</td>\n",
       "      <td>专业必修课</td>\n",
       "      <td>4</td>\n",
       "      <td>81.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>数据管理系统</td>\n",
       "      <td>专业必修课</td>\n",
       "      <td>6</td>\n",
       "      <td>70.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>统计与数据分析基础</td>\n",
       "      <td>专业必修课</td>\n",
       "      <td>4</td>\n",
       "      <td>82.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>商业分析</td>\n",
       "      <td>专业必修课</td>\n",
       "      <td>3</td>\n",
       "      <td>70.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>数据伦理</td>\n",
       "      <td>专业必修课</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>设计思维</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>73.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>统计分析软件（R语言）</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>83.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>软件工程</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>91.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>数据仓库与OLAP</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>94.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>数据库系统实现</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>98.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>大数据处理工具</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>73.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>统计推断</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>75.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>信息检索</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>93.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>信息可视化</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>93.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>自然语言处理</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>80.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>统计学习</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>81.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>计算广告</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>99.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>前端设计与开发</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>90.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           课程名字   课程类别  学分     分数\n",
       "1      云计算应用与开发  专业选修课   3   96.0\n",
       "2          社会计算  专业选修课   3   78.0\n",
       "3          深度学习  专业选修课   3   75.0\n",
       "4        人工智能导论  专业必修课   3   84.0\n",
       "6       数据结构与算法  学科基础课   5   82.0\n",
       "7     开源软件设计与开发  专业选修课   3   81.0\n",
       "9       大数据应用案例  专业必修课   3   91.0\n",
       "11         电子商务  专业选修课   3   74.0\n",
       "12      高等数学（二）  学科基础课   4   92.0\n",
       "14        计算机视觉  专业选修课   3    NaN\n",
       "15         离散数学  学科基础课   4   82.0\n",
       "16    数据科学的数学基础  专业必修课   4   75.0\n",
       "17         程序设计  学科基础课   4   84.0\n",
       "19         线性代数  学科基础课   3   97.0\n",
       "21         机器学习  专业选修课   3   88.0\n",
       "22         位置服务  专业选修课   3   87.0\n",
       "26         数据挖掘  专业必修课   4   71.0\n",
       "28        概率图模型  专业选修课   3   96.0\n",
       "30       高等统计选讲  专业选修课   3   87.0\n",
       "31         推荐系统  专业选修课   3   97.0\n",
       "32         智慧城市  专业选修课   3   77.0\n",
       "0       高等数学（一）  学科基础课   4   99.0\n",
       "1     数据科学与工程导论  学科基础课   3    NaN\n",
       "2          专业英语  学科基础课   2  100.0\n",
       "3           概率论  学科基础课   3   99.0\n",
       "4         计算机系统  专业必修课   4   80.0\n",
       "5     数据科学与工程算法  专业必修课   3   99.0\n",
       "6          操作系统  专业必修课   4   89.0\n",
       "7      分布式模型与编程  专业必修课   4   81.0\n",
       "8        数据管理系统  专业必修课   6   70.0\n",
       "9     统计与数据分析基础  专业必修课   4   82.0\n",
       "10         商业分析  专业必修课   3   70.0\n",
       "11         数据伦理  专业必修课   1    NaN\n",
       "12         设计思维  专业选修课   3   73.0\n",
       "13  统计分析软件（R语言）  专业选修课   3   83.0\n",
       "14         软件工程  专业选修课   3   91.0\n",
       "15    数据仓库与OLAP  专业选修课   3   94.0\n",
       "16      数据库系统实现  专业选修课   3   98.0\n",
       "17      大数据处理工具  专业选修课   3   73.0\n",
       "18         统计推断  专业选修课   3   75.0\n",
       "19         信息检索  专业选修课   3   93.0\n",
       "20        信息可视化  专业选修课   3   93.0\n",
       "21       自然语言处理  专业选修课   3   80.0\n",
       "22         统计学习  专业选修课   3   81.0\n",
       "23         计算广告  专业选修课   3   99.0\n",
       "24      前端设计与开发  专业选修课   3   90.0"
      ]
     },
     "execution_count": 104,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "common"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [],
   "source": [
    "special = pd.concat([df_a12,df_a22])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>课程名字</th>\n",
       "      <th>课程类别</th>\n",
       "      <th>学分</th>\n",
       "      <th>分数</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>思想道德修养与法律基础</td>\n",
       "      <td>思政类</td>\n",
       "      <td>3</td>\n",
       "      <td>89.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>中国近代史纲要</td>\n",
       "      <td>思政类</td>\n",
       "      <td>3</td>\n",
       "      <td>97.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>网球（初）</td>\n",
       "      <td>体育类</td>\n",
       "      <td>1</td>\n",
       "      <td>81.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>极端性气候与陆地生态系统</td>\n",
       "      <td>公共任意选修类</td>\n",
       "      <td>2</td>\n",
       "      <td>78.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>游泳（初）</td>\n",
       "      <td>体育类</td>\n",
       "      <td>1</td>\n",
       "      <td>75.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>足球（初）</td>\n",
       "      <td>体育类</td>\n",
       "      <td>1</td>\n",
       "      <td>100.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>毛泽东思想和中国特色社会主义理论体系概论（一）</td>\n",
       "      <td>思政类</td>\n",
       "      <td>3</td>\n",
       "      <td>99.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>学术英语听说（一）</td>\n",
       "      <td>英语类</td>\n",
       "      <td>2</td>\n",
       "      <td>85.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>程序正义与经典案例解析</td>\n",
       "      <td>精品课程</td>\n",
       "      <td>2</td>\n",
       "      <td>76.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>环境可持续发展</td>\n",
       "      <td>公共任意选修类</td>\n",
       "      <td>2</td>\n",
       "      <td>76.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>毛泽东思想和中国特色社会主义理论体系概论（二）</td>\n",
       "      <td>思政类</td>\n",
       "      <td>3</td>\n",
       "      <td>93.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>篮球（初）</td>\n",
       "      <td>体育类</td>\n",
       "      <td>1</td>\n",
       "      <td>77.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>学术英语听说（二）</td>\n",
       "      <td>英语类</td>\n",
       "      <td>2</td>\n",
       "      <td>92.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>学术英语阅读</td>\n",
       "      <td>英语类</td>\n",
       "      <td>2</td>\n",
       "      <td>72.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>学术英语写作</td>\n",
       "      <td>英语类</td>\n",
       "      <td>2</td>\n",
       "      <td>98.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>美国社会与文化</td>\n",
       "      <td>英语类</td>\n",
       "      <td>2</td>\n",
       "      <td>77.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>马克思主义基本原理概论</td>\n",
       "      <td>思政类</td>\n",
       "      <td>3</td>\n",
       "      <td>95.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>军事理论</td>\n",
       "      <td>思政类</td>\n",
       "      <td>2</td>\n",
       "      <td>87.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>大学语文</td>\n",
       "      <td>文化传承类</td>\n",
       "      <td>2</td>\n",
       "      <td>100.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>现代交通地理学</td>\n",
       "      <td>公共任意选修类</td>\n",
       "      <td>2</td>\n",
       "      <td>88.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>植物王国</td>\n",
       "      <td>公共任意选修类</td>\n",
       "      <td>2</td>\n",
       "      <td>92.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                       课程名字     课程类别  学分     分数\n",
       "0               思想道德修养与法律基础      思政类   3   89.0\n",
       "5                   中国近代史纲要      思政类   3   97.0\n",
       "8                     网球（初）      体育类   1   81.0\n",
       "10             极端性气候与陆地生态系统  公共任意选修类   2   78.0\n",
       "13                    游泳（初）      体育类   1   75.0\n",
       "18                    足球（初）      体育类   1  100.0\n",
       "20  毛泽东思想和中国特色社会主义理论体系概论（一）      思政类   3   99.0\n",
       "23                学术英语听说（一）      英语类   2   85.0\n",
       "24              程序正义与经典案例解析     精品课程   2   76.0\n",
       "25                  环境可持续发展  公共任意选修类   2   76.0\n",
       "27  毛泽东思想和中国特色社会主义理论体系概论（二）      思政类   3   93.0\n",
       "29                    篮球（初）      体育类   1   77.0\n",
       "25                学术英语听说（二）      英语类   2   92.0\n",
       "26                   学术英语阅读      英语类   2   72.0\n",
       "27                   学术英语写作      英语类   2   98.0\n",
       "28                  美国社会与文化      英语类   2   77.0\n",
       "29              马克思主义基本原理概论      思政类   3   95.0\n",
       "30                     军事理论      思政类   2   87.0\n",
       "31                     大学语文    文化传承类   2  100.0\n",
       "32                  现代交通地理学  公共任意选修类   2   88.0\n",
       "33                     植物王国  公共任意选修类   2   92.0"
      ]
     },
     "execution_count": 106,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "special"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "第三题：请直接读取两张表合并后拆分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {},
   "outputs": [],
   "source": [
    "#先使用concat函数进行合并\n",
    "df = pd.concat([df1,df2])\n",
    "#query函数进行判断\n",
    "special2 = df.query('课程类别 in [\"学科基础课\",\"专业必修课\",\"专业选修课\"]')\n",
    "common2 = df.query('课程类别 not in [\"学科基础课\",\"专业必修课\",\"专业选修课\"]')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "第四题;专业课程中有缺失值吗，如果有的话请在完成(3)的同时，用组内（3种类型的专业课）均值填充缺失值后拆分。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 46 entries, 1 to 24\n",
      "Data columns (total 4 columns):\n",
      "课程名字    46 non-null object\n",
      "课程类别    46 non-null object\n",
      "学分      46 non-null int64\n",
      "分数      43 non-null float64\n",
      "dtypes: float64(1), int64(1), object(2)\n",
      "memory usage: 1.8+ KB\n"
     ]
    }
   ],
   "source": [
    "special2.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "metadata": {},
   "outputs": [],
   "source": [
    "#要掌握分组进行缺失值填充\n",
    "df['分数'] = df.groupby('课程类别').transform(lambda x: x.fillna(x.mean()))['分数']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>课程名字</th>\n",
       "      <th>课程类别</th>\n",
       "      <th>学分</th>\n",
       "      <th>分数</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>思想道德修养与法律基础</td>\n",
       "      <td>思政类</td>\n",
       "      <td>3</td>\n",
       "      <td>89.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>云计算应用与开发</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>96.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>社会计算</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>78.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>深度学习</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>75.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>人工智能导论</td>\n",
       "      <td>专业必修课</td>\n",
       "      <td>3</td>\n",
       "      <td>84.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>中国近代史纲要</td>\n",
       "      <td>思政类</td>\n",
       "      <td>3</td>\n",
       "      <td>97.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>数据结构与算法</td>\n",
       "      <td>学科基础课</td>\n",
       "      <td>5</td>\n",
       "      <td>82.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>开源软件设计与开发</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>81.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>网球（初）</td>\n",
       "      <td>体育类</td>\n",
       "      <td>1</td>\n",
       "      <td>81.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>大数据应用案例</td>\n",
       "      <td>专业必修课</td>\n",
       "      <td>3</td>\n",
       "      <td>91.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>极端性气候与陆地生态系统</td>\n",
       "      <td>公共任意选修类</td>\n",
       "      <td>2</td>\n",
       "      <td>78.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>电子商务</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>74.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>高等数学（二）</td>\n",
       "      <td>学科基础课</td>\n",
       "      <td>4</td>\n",
       "      <td>92.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>游泳（初）</td>\n",
       "      <td>体育类</td>\n",
       "      <td>1</td>\n",
       "      <td>75.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>计算机视觉</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>85.791667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>离散数学</td>\n",
       "      <td>学科基础课</td>\n",
       "      <td>4</td>\n",
       "      <td>82.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>数据科学的数学基础</td>\n",
       "      <td>专业必修课</td>\n",
       "      <td>4</td>\n",
       "      <td>75.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>程序设计</td>\n",
       "      <td>学科基础课</td>\n",
       "      <td>4</td>\n",
       "      <td>84.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>足球（初）</td>\n",
       "      <td>体育类</td>\n",
       "      <td>1</td>\n",
       "      <td>100.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>线性代数</td>\n",
       "      <td>学科基础课</td>\n",
       "      <td>3</td>\n",
       "      <td>97.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>毛泽东思想和中国特色社会主义理论体系概论（一）</td>\n",
       "      <td>思政类</td>\n",
       "      <td>3</td>\n",
       "      <td>99.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>机器学习</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>88.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>位置服务</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>87.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>学术英语听说（一）</td>\n",
       "      <td>英语类</td>\n",
       "      <td>2</td>\n",
       "      <td>85.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>程序正义与经典案例解析</td>\n",
       "      <td>精品课程</td>\n",
       "      <td>2</td>\n",
       "      <td>76.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>环境可持续发展</td>\n",
       "      <td>公共任意选修类</td>\n",
       "      <td>2</td>\n",
       "      <td>76.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>数据挖掘</td>\n",
       "      <td>专业必修课</td>\n",
       "      <td>4</td>\n",
       "      <td>71.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>毛泽东思想和中国特色社会主义理论体系概论（二）</td>\n",
       "      <td>思政类</td>\n",
       "      <td>3</td>\n",
       "      <td>93.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>概率图模型</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>96.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>篮球（初）</td>\n",
       "      <td>体育类</td>\n",
       "      <td>1</td>\n",
       "      <td>77.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>计算机系统</td>\n",
       "      <td>专业必修课</td>\n",
       "      <td>4</td>\n",
       "      <td>80.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>数据科学与工程算法</td>\n",
       "      <td>专业必修课</td>\n",
       "      <td>3</td>\n",
       "      <td>99.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>操作系统</td>\n",
       "      <td>专业必修课</td>\n",
       "      <td>4</td>\n",
       "      <td>89.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>分布式模型与编程</td>\n",
       "      <td>专业必修课</td>\n",
       "      <td>4</td>\n",
       "      <td>81.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>数据管理系统</td>\n",
       "      <td>专业必修课</td>\n",
       "      <td>6</td>\n",
       "      <td>70.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>统计与数据分析基础</td>\n",
       "      <td>专业必修课</td>\n",
       "      <td>4</td>\n",
       "      <td>82.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>商业分析</td>\n",
       "      <td>专业必修课</td>\n",
       "      <td>3</td>\n",
       "      <td>70.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>数据伦理</td>\n",
       "      <td>专业必修课</td>\n",
       "      <td>1</td>\n",
       "      <td>81.090909</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>设计思维</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>73.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>统计分析软件（R语言）</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>83.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>软件工程</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>91.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>数据仓库与OLAP</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>94.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>数据库系统实现</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>98.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>大数据处理工具</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>73.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>统计推断</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>75.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>信息检索</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>93.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>信息可视化</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>93.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>自然语言处理</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>80.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>统计学习</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>81.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>计算广告</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>99.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>前端设计与开发</td>\n",
       "      <td>专业选修课</td>\n",
       "      <td>3</td>\n",
       "      <td>90.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>学术英语听说（二）</td>\n",
       "      <td>英语类</td>\n",
       "      <td>2</td>\n",
       "      <td>92.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>学术英语阅读</td>\n",
       "      <td>英语类</td>\n",
       "      <td>2</td>\n",
       "      <td>72.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>学术英语写作</td>\n",
       "      <td>英语类</td>\n",
       "      <td>2</td>\n",
       "      <td>98.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>美国社会与文化</td>\n",
       "      <td>英语类</td>\n",
       "      <td>2</td>\n",
       "      <td>77.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>马克思主义基本原理概论</td>\n",
       "      <td>思政类</td>\n",
       "      <td>3</td>\n",
       "      <td>95.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>军事理论</td>\n",
       "      <td>思政类</td>\n",
       "      <td>2</td>\n",
       "      <td>87.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>大学语文</td>\n",
       "      <td>文化传承类</td>\n",
       "      <td>2</td>\n",
       "      <td>100.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>现代交通地理学</td>\n",
       "      <td>公共任意选修类</td>\n",
       "      <td>2</td>\n",
       "      <td>88.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>植物王国</td>\n",
       "      <td>公共任意选修类</td>\n",
       "      <td>2</td>\n",
       "      <td>92.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>67 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                       课程名字     课程类别  学分          分数\n",
       "0               思想道德修养与法律基础      思政类   3   89.000000\n",
       "1                  云计算应用与开发    专业选修课   3   96.000000\n",
       "2                      社会计算    专业选修课   3   78.000000\n",
       "3                      深度学习    专业选修课   3   75.000000\n",
       "4                    人工智能导论    专业必修课   3   84.000000\n",
       "5                   中国近代史纲要      思政类   3   97.000000\n",
       "6                   数据结构与算法    学科基础课   5   82.000000\n",
       "7                 开源软件设计与开发    专业选修课   3   81.000000\n",
       "8                     网球（初）      体育类   1   81.000000\n",
       "9                   大数据应用案例    专业必修课   3   91.000000\n",
       "10             极端性气候与陆地生态系统  公共任意选修类   2   78.000000\n",
       "11                     电子商务    专业选修课   3   74.000000\n",
       "12                  高等数学（二）    学科基础课   4   92.000000\n",
       "13                    游泳（初）      体育类   1   75.000000\n",
       "14                    计算机视觉    专业选修课   3   85.791667\n",
       "15                     离散数学    学科基础课   4   82.000000\n",
       "16                数据科学的数学基础    专业必修课   4   75.000000\n",
       "17                     程序设计    学科基础课   4   84.000000\n",
       "18                    足球（初）      体育类   1  100.000000\n",
       "19                     线性代数    学科基础课   3   97.000000\n",
       "20  毛泽东思想和中国特色社会主义理论体系概论（一）      思政类   3   99.000000\n",
       "21                     机器学习    专业选修课   3   88.000000\n",
       "22                     位置服务    专业选修课   3   87.000000\n",
       "23                学术英语听说（一）      英语类   2   85.000000\n",
       "24              程序正义与经典案例解析     精品课程   2   76.000000\n",
       "25                  环境可持续发展  公共任意选修类   2   76.000000\n",
       "26                     数据挖掘    专业必修课   4   71.000000\n",
       "27  毛泽东思想和中国特色社会主义理论体系概论（二）      思政类   3   93.000000\n",
       "28                    概率图模型    专业选修课   3   96.000000\n",
       "29                    篮球（初）      体育类   1   77.000000\n",
       "..                      ...      ...  ..         ...\n",
       "4                     计算机系统    专业必修课   4   80.000000\n",
       "5                 数据科学与工程算法    专业必修课   3   99.000000\n",
       "6                      操作系统    专业必修课   4   89.000000\n",
       "7                  分布式模型与编程    专业必修课   4   81.000000\n",
       "8                    数据管理系统    专业必修课   6   70.000000\n",
       "9                 统计与数据分析基础    专业必修课   4   82.000000\n",
       "10                     商业分析    专业必修课   3   70.000000\n",
       "11                     数据伦理    专业必修课   1   81.090909\n",
       "12                     设计思维    专业选修课   3   73.000000\n",
       "13              统计分析软件（R语言）    专业选修课   3   83.000000\n",
       "14                     软件工程    专业选修课   3   91.000000\n",
       "15                数据仓库与OLAP    专业选修课   3   94.000000\n",
       "16                  数据库系统实现    专业选修课   3   98.000000\n",
       "17                  大数据处理工具    专业选修课   3   73.000000\n",
       "18                     统计推断    专业选修课   3   75.000000\n",
       "19                     信息检索    专业选修课   3   93.000000\n",
       "20                    信息可视化    专业选修课   3   93.000000\n",
       "21                   自然语言处理    专业选修课   3   80.000000\n",
       "22                     统计学习    专业选修课   3   81.000000\n",
       "23                     计算广告    专业选修课   3   99.000000\n",
       "24                  前端设计与开发    专业选修课   3   90.000000\n",
       "25                学术英语听说（二）      英语类   2   92.000000\n",
       "26                   学术英语阅读      英语类   2   72.000000\n",
       "27                   学术英语写作      英语类   2   98.000000\n",
       "28                  美国社会与文化      英语类   2   77.000000\n",
       "29              马克思主义基本原理概论      思政类   3   95.000000\n",
       "30                     军事理论      思政类   2   87.000000\n",
       "31                     大学语文    文化传承类   2  100.000000\n",
       "32                  现代交通地理学  公共任意选修类   2   88.000000\n",
       "33                     植物王国  公共任意选修类   2   92.000000\n",
       "\n",
       "[67 rows x 4 columns]"
      ]
     },
     "execution_count": 126,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "然后再进行分组，步骤同上"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
